from pyspark import SparkConf, SparkContext

if __name__ == '__main__':
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    sc = SparkContext(conf=conf)

    rdd = sc.parallelize([3, 1, 3, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8, 9], 1)

    # 排序，并取top n
    print(rdd.takeOrdered(5, lambda x: -x))
